---
title: "Plots and Tables"
author: "Kohei Watanabe and Marius Saeltzer"
date: "`r Sys.Date()`"
output: html_document
editor_options: 
  chunk_output_type: console
---

```{r setup, include=FALSE, message=FALSE, warning=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning=FALSE)
require(ROCit)
require(LSX)

plot_prop <- function(x, class, ci = TRUE, add = FALSE, col = 1, ...) {
    par(mar = c(3, 4, 3, 1), cex = 0.8, font.main = 1)
    ls <- split(x$class == class, x$lss_bin)
    n <- lengths(ls)
    m <- sapply(ls, mean, na.rm = TRUE)
    sd <- sapply(ls, sd, na.rm = TRUE)
    se <- sd / sqrt(n)
    z <-2.32
    if (!add) {
        plot(m, type = "b", xaxt = "n", ylim = c(-0.15, 1), xlim = c(1 -0.25, length(m) + 0.25),
             ylab = "Proportion", xlab = "Temporality", lty = 2, pch = col, col = col, ...)
        axis(1, seq_along(m), names(m))
    } else {
        lines(m, type = "b", xaxt = "n", lty = 2, col = col, ...)
    }
    if (ci) {
        arrows(seq_along(m), m - (z * se), seq_along(m), m + (z * se), angle = 90, code = 3, 
               length = 0.05, col = col)
    }
    if (!add) {
        text(x = seq_along(m), y = par("usr")[3], labels = n, adj = c(0.5, -1))
        abline(h = seq(0, 1, by = 0.2), col = "lightgray", lty = 3)
    }
}
```

## Tempoarity words {.tabset}

### English

```{r fig.height=5, fig.width=10, echo=FALSE}
dict_en <- quanteda::dictionary(file = "dictionary/LIWC2001_English.dic")
lss_en <- readRDS("lss_en.rds")
set.seed(1234)
textplot_terms(lss_en, dict_en$Time) + ggplot2::xlab("Temporality")
```

### German

```{r fig.height=5, fig.width=10, echo=FALSE}
dict_de <- quanteda::dictionary(file = "dictionary/LIWC2001_German_UTF8.dic")
lss_de <- readRDS("lss_de.rds")
set.seed(1234)
textplot_terms(lss_de, dict_de$Time) + ggplot2::xlab("Temporality")
```

## Classification accuracy {.tabset}

```{r fig.height = 7, fig.width = 10, echo=FALSE}
dat_en <- readRDS("data_lss_en.rds")
dat_de <- readRDS("data_lss_de.rds")

roc_en <- rocit(dat_en$lss, dat_en$class == "Future")
roc_de <- rocit(dat_de$lss, dat_de$class == "Future")

par(mfrow = c(2, 2))
plot_prop(dat_en, "Future", ci = FALSE, col = 2, main = "English")
plot_prop(dat_en, "Present", ci = FALSE, add = TRUE)
plot_prop(dat_en, "Past", ci = FALSE, add = TRUE, col = 3)

plot_prop(dat_de, "Future", ci = FALSE, col = 2, main = "German")
plot_prop(dat_de, "Present", ci = FALSE, add = TRUE)
plot_prop(dat_de, "Past", ci = FALSE, add = TRUE, col = 3)

par(mar = c(4, 4, 1, 1), cex = 0.8, font.main = 1)
plot(roc_en$FPR, roc_en$TPR, type = "l", xlab = "False positive", ylab = "True positive")
i <- which.min(abs(roc_en$Cutoff))
points(roc_en$FPR[i], roc_en$TPR[i], pch = 1)
abline(h = seq(0, 1, by = 0.2), col = "lightgray", lty = 3)
abline(0, 1, lty = 2)
legend("bottomright", legend = sprintf("AUC %0.2f", sum(roc_en$TPR) / length(roc_en$TPR)),
       box.lwd = NA, bg = NULL)

plot(roc_de$FPR, roc_de$TPR, type = "l", xlab = "False positive", ylab = "True positive")
j <- which.min(abs(roc_de$Cutoff))
points(roc_de$FPR[j], roc_de$TPR[j], pch = 1)
abline(h = seq(0, 1, by = 0.2), col = "lightgray", lty = 3)
abline(0, 1, lty = 2)
legend("bottomright", legend = sprintf("AUC %0.2f", sum(roc_de$TPR) / length(roc_de$TPR)),
       box.lwd = NA, bg = NULL)
```

### English

```{r}
cbind("FPR" = roc_en$FPR[i], "TPR" = roc_en$TPR[i])
```

### German

```{r}
cbind("FPR" = roc_de$FPR[j], "TPR" = roc_de$TPR[j])
```

## Compare with SVM {.tabset}

### English

```{r}
dat_pred_en <- readRDS("data_predict_en.rds")
cor(dat_pred_en[c("lss", "Past", "Present", "Future")], use = "pair")[1,]
```

### German

```{r}
dat_pred_de <- readRDS("data_predict_de.rds")
cor(dat_pred_de[c("lss", "Past", "Present", "Future")], use = "pair")[1,]
```

